capture log close
clear all
set more off

* Stablish Working Directory ***************************************************
cd "$workdirectory"

*-------------------------------------------------------------------------------
* Importing intersection of FUAs and districts - ADMIN NAME variable eliminated because it generated conflicts when importing
import delimited "raw_datasets\Maps\intersection_adm1.csv", encoding(UTF-8) clear
save "processed_datasets/dataset_maps_and_cities_adm1", replace

* Importing area of districts
* We drop observations without geolevel1 and also observations with geolevel == 888888
import delimited "raw_datasets\Maps\area_dist.csv", encoding(UTF-8) clear
keep geolevel1 area_dist
drop if geolevel1 == .
drop if geolevel1 == 888888
tempfile area_dist
save `area_dist', replace

* Merging and obtaining shares
use "processed_datasets/dataset_maps_and_cities_adm1", clear
merge m:1 geolevel1 using `area_dist'
keep if _merge == 3
drop _merge

gen share = area_calcu/area_dist
sort geolevel1
save "processed_datasets/dataset_maps_and_cities_adm1", replace


*-------------------------------------------------------------------------------
* Generating industry per city data

* Generating empty file to attach data to
clear all
tempfile data_indgen_cities_adm1
save `data_indgen_cities_adm1', empty replace

* Genearting local with all .csv in the folder
local files : dir "raw_datasets\Maps\indgen_maps_adm1" files "*.csv"

cd raw_datasets\Maps\indgen_maps_adm1

* Importing, appending and saving
foreach fi in `files' {
  import delimited `fi', varnames(11) rowrange(12) encoding(UTF-8) clear
  gen country_year = subinstr("`fi'",".csv","",.)
  append using `data_indgen_cities_adm1'
  save `data_indgen_cities_adm1', replace
}

* Cleaning
keep if v1 == "Weighted N"
drop v1
rename v2 census
drop if census == "COL TOTAL"

* Exctracting geolevel1
gen geolevel1 = regexs(0) if(regexm(census, "[0-9]+"))
destring geolevel1, replace

sort geolevel1
save `data_indgen_cities_adm1', replace

********************************************************************************
cd ..\..\..
********************************************************************************

*-------------------------------------------------------------------------------
* Genearting final dataset

* Joining both datasets
use "processed_datasets/dataset_maps_and_cities_adm1", clear
joinby geolevel1 using `data_indgen_cities_adm1'

* Renaming industries
rename niunotinuniverse 						niu
rename agriculturefishingandforestry 			agri
rename miningandextraction 						mining
rename manufacturing 							mfg
rename electricitygaswaterandwastemanag 		utilities
rename construction 							construction
rename wholesaleandretailtrade 					trade
rename hotelsandrestaurants 					hospitality
rename transportationstorageandcommunic 		transport
rename financialservicesandinsurance 			fin_insu
rename publicadministrationanddefense 			govmt
rename businessservicesandrealestate 			bussserv_rs
rename education 								educ
rename healthandsocialwork 						health
rename otherservices 							other_serv
rename privatehouseholdservices 				house_serv
rename unknown 									unknown
rename rowtotal 								rowtotal	
rename otherindustrynec 						other_industry
rename servicesnotspecified 					serv_notsp
rename responsesuppressed						resp_supressed

* Applying Shares
foreach v of varlist niu agri mining mfg utilities construction trade hospitality transport fin_insu govmt bussserv_rs educ health other_serv house_serv unknown rowtotal other_industry serv_notsp resp_supressed{
	gen _`v' = `v'*share
}

* Fixing country names from both datasets
replace cntry_na_1 = "Lao People's Democratic Republic" if cntry_na_1 == "Laos"

* Drop intersections which associate districts from country i to a city located in country j
drop if cntry_name != cntry_na_1

* Collapsing by city and census
collapse (sum) _* (mean) fua_p_2015 (first) efua_name, by(efua_id country_year)

foreach v2 of varlist _agri _mining _mfg _utilities _construction _trade _hospitality _transport _fin_insu _govmt _bussserv_rs _educ _health _other_serv _house_serv _unknown _other_industry _serv_notsp  _resp_supressed {
	gen share`v2' = `v2'/(_rowtotal-_niu)
}

save "processed_datasets/dataset_maps_and_cities_adm1", replace
